library(tidyverse)
library(lubridate)
library(plotly)
library(gganimate)
library(transformr)
time_series_confirmed_long <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
rename(Province_State = "Province/State", Country_Region = "Country/Region") %>%
pivot_longer(-c(Province_State, Country_Region, Lat, Long),
names_to = "Date", values_to = "Confirmed")
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
# Let's get the times series data for deaths
time_series_deaths_long <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")) %>%
rename(Province_State = "Province/State", Country_Region = "Country/Region") %>%
pivot_longer(-c(Province_State, Country_Region, Lat, Long),
names_to = "Date", values_to = "Deaths")
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
# Create Keys
time_series_confirmed_long <- time_series_confirmed_long %>%
unite(Key, Province_State, Country_Region, Date, sep = ".", remove = FALSE)
time_series_deaths_long <- time_series_deaths_long %>%
unite(Key, Province_State, Country_Region, Date, sep = ".") %>%
select(Key, Deaths)
# Join tables
time_series_long_joined <- full_join(time_series_confirmed_long,
time_series_deaths_long, by = c("Key")) %>%
select(-Key)
# Reformat the data
time_series_long_joined$Date <- mdy(time_series_long_joined$Date)
# Create Report table with counts
time_series_long_joined_counts <- time_series_long_joined %>%
pivot_longer(-c(Province_State, Country_Region, Lat, Long, Date),
names_to = "Report_Type", values_to = "Counts")
# Plot graph to a pdf outputfile
pdf("images/time_series_example_plot.pdf", width=6, height=3)
time_series_long_joined %>%
group_by(Country_Region,Date) %>%
summarise_at(c("Confirmed", "Deaths"), sum) %>%
filter (Country_Region == "US") %>%
ggplot(aes(x = Date, y = Deaths)) +
geom_point() +
geom_line() +
ggtitle("US COVID-19 Deaths")
dev.off()
## png
## 2
# Plot graph to a png outputfile
ppi <- 300
png("images/time_series_example_plot.png", width=6*ppi, height=6*ppi, res=ppi)
time_series_long_joined %>%
group_by(Country_Region,Date) %>%
summarise_at(c("Confirmed", "Deaths"), sum) %>%
filter (Country_Region == "US") %>%
ggplot(aes(x = Date, y = Deaths)) +
geom_point() +
geom_line() +
ggtitle("US COVID-19 Deaths")
dev.off()
## png
## 2
US COVID-19 Deaths
ggplotly(time_series_long_joined %>%
group_by(Country_Region,Date) %>%
summarise_at(c("Confirmed", "Deaths"), sum) %>%
filter (Country_Region == "US") %>%
ggplot(aes(x = Date, y = Deaths)) +
geom_point() +
geom_line() +
ggtitle("US COVID-19 Deaths")
)
US_deaths <- time_series_long_joined %>%
group_by(Country_Region,Date) %>%
summarise_at(c("Confirmed", "Deaths"), sum) %>%
filter (Country_Region == "US")
p <- ggplot(data = US_deaths, aes(x = Date, y = Deaths)) +
geom_point() +
geom_line() +
ggtitle("US COVID-19 Deaths")
ggplotly(p)
## Animated Graphs with gganimate
theme_set(theme_bw())
data_time <- time_series_long_joined %>%
group_by(Country_Region,Date) %>%
summarise_at(c("Confirmed", "Deaths"), sum) %>%
filter (Country_Region %in% c("China","Korea, South","Japan","Italy","US"))
p <- ggplot(data_time, aes(x = Date, y = Confirmed, color = Country_Region)) +
geom_point() +
geom_line() +
ggtitle("Confirmed COVID-19 Cases") +
geom_point(aes(group = seq_along(Date))) +
transition_reveal(Date)
# Some people needed to use this line instead
animate(p,renderer = gifski_renderer(), end_pause = 15)
#animate(p, end_pause = 15)
Challenge 1 Print a graph (different from the one above) to a png file using 3*ppi for the height and width and display the png file in the report using the above R Markdown format
ppi <- 300
png("images/confirmed_vs_deaths_graph.png", width=3*ppi, height=3*ppi, res=ppi)
time_series_long_joined_counts %>%
filter(Country_Region=="US") %>%
ggplot(aes(x=Date, y=Counts))+
geom_line(aes(group=Report_Type, color=Report_Type))+
labs(title = "COVID-19 Cases vs Deaths in US")
dev.off()
US COVID-19 Cases vs Deaths
Challenge 2 Turn one of the exercises from Lab 5 into an interactive graph with plotyly
time_series_confirmed_US<- read_csv(file="data/time_series_covid19_confirmed_US.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## iso2 = col_character(),
## iso3 = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Combined_Key = col_character()
## )
## See spec(...) for full column specifications.
us_confirmed_df <-time_series_confirmed_US %>%
pivot_longer(cols = -c(colnames(time_series_confirmed_US)[1:11]), names_to="Date", values_to= "Confirmed") %>%
group_by(Province_State,Date) %>%
summarise("Total_Confirmed"= sum(Confirmed))
## `summarise()` regrouping output by 'Province_State' (override with `.groups` argument)
us_confirmed_df$Date <- mdy(us_confirmed_df$Date)
#datatable(us_confirmed_df)
my_plot<-us_confirmed_df %>%
ggplot(mapping=aes(x=Date, y=Total_Confirmed))+
geom_line(aes(group=as.factor(Province_State), color=Province_State), size=1, show.legend = FALSE)+
facet_wrap(~Province_State, ncol=4, scales = "free_y")+
theme_classic()+
theme(text= element_text(size=11, colour = 'black'),
line = element_line(size = 1, colour = 'black'))
hide_legend(ggplotly(my_plot, width = 1000,
height = 1500))
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
Challenge 3 Create an animated graph of your choosing using the time series data to display an aspect (e.g. states or countries) of the data that is important to you
tot_confirmed_sum <- sum(time_series_long_joined$Confirmed)
tot_deaths_sum <- sum(time_series_long_joined$Deaths)
Lat_plot_df <-time_series_long_joined%>%
group_by(Lat) %>%
summarise(sum_Confirmed= (sum(Confirmed)/tot_confirmed_sum)*100, sum_Death= (sum(Deaths)/tot_deaths_sum)*100) #%>%
## `summarise()` ungrouping output (override with `.groups` argument)
#pivot_longer(cols = c(sum_Confirmed, sum_Death), names_to="Type", values_to="Number")
#Long_plot_df <-time_series_long_joined%>%
# group_by(Long) %>%
# summarise(sum_Confirmed= sum(Confirmed), sum_Death= sum(Deaths)) #%>%
#pivot_longer(cols = c(sum_Confirmed, sum_Death), names_to="Type", values_to="Number")
Lat_plot<-ggplot()+
#geom_line(data=Long_plot_df, aes(x=Long, y=sum_Confirmed), color="red", show.legend = TRUE)+
geom_line(data=Lat_plot_df, aes(x=Lat, y=sum_Confirmed), color="blue", show.legend = TRUE)+
labs(title="Percentage of Total COVID-19 Cases Closer To Equator Is Relatively Low",
x= "Latitude",
y= "Percentage of Total Cases")+
theme_classic()+
transition_reveal(Lat)
animate(Lat_plot, renderer = gifski_renderer(), end_pause = 15)